Defining the input and output files
# Clean the working environment
rm(list = ls())
knitr::opts_chunk$set(echo = TRUE)
empirical_species <- "German Shepherd"
document_title <- paste(empirical_species," Pipeline Results")
MAF_pruning_used <- TRUE
N_e <- 50
# document_sub <- paste("MAF 0.05 used, but only for H_E-computation, N_e=", N_e)
if (MAF_pruning_used == FALSE) {
document_sub <- paste("No MAF-based pruning used, N_e =", N_e)
} else {
document_sub <- paste("MAF-based pruning used, N_e =", N_e)
}
############################################
# Parameters used for displaying the result
############################################
ROH_frequency_decimals <- 1
H_e_values_decimals <- 5
F_ROH_values_decimals <- 3
####################################
# Defining the input files
####################################
Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")
Sweep_test_dir <- Sys.getenv("Sweep_test_dir")
###############
## Empirical ###
###############
### ROH hotspots ###
Empirical_data_ROH_hotspots_dir <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Empirical_data_F_ROH_dir <- Sys.getenv("Empirical_data_F_ROH_dir")
### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")
###############
## Simulated ###
###############
### Causative variant ###
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")
pruned_replicates_count_dir <- Sys.getenv("pruned_replicates_count_dir")
Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
causative_variant_windows_H_e_dir <- Sys.getenv("causative_variant_windows_H_e_dir")
### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")
Selection_model_ROH_hotspots_dir <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")
### Inbreeding coefficient ###
Neutral_model_F_ROH_dir <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir <- Sys.getenv("Selection_model_F_ROH_dir")
### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")
histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue"
selection_model_color <- "purple"
output_dir <- Sys.getenv("Pipeline_results_output_dir")
# Sys.getenv()
# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
| Selection_coefficient | Fixation_probability | Avg_Fixation_time | Min_fixation_time | Max_fixation_time |
|---|---|---|---|---|
| s=0.05 | 0.3 | 34.15 | 25 | 39 |
| s=0.1 | 1.5 | 33.70 | 25 | 39 |
| s=0.2 | 16.0 | 32.65 | 24 | 39 |
| s=0.3 | 27.4 | 26.35 | 17 | 39 |
| s=0.4 | 41.7 | 22.50 | 12 | 35 |
| s=0.5 | 54.1 | 16.15 | 8 | 23 |
| s=0.6 | 54.1 | 12.15 | 8 | 16 |
| s=0.7 | 41.7 | 9.10 | 7 | 13 |
| s=0.8 | 52.6 | 8.05 | 6 | 12 |
| Selection_coefficient | Avg_Length_Mb | Avg_window_freq | Min_freq | Max_freq | Avg_freq_variant_100k_window | |
|---|---|---|---|---|---|---|
| 4 | s=0.3 | 4.592906 | 10.7 | 0 | 68 | 10.8 |
| 2 | s=0.1 | 5.093953 | 38.6 | 0 | 84 | 39.6 |
| 1 | s=0.05 | 5.763953 | 29.8 | 0 | 82 | 29.7 |
| 6 | s=0.5 | 5.793953 | 4.4 | 0 | 38 | 3.6 |
| 5 | s=0.4 | 6.965000 | 8.3 | 0 | 66 | 7.6 |
| 3 | s=0.2 | 7.050000 | 24.0 | 0 | 80 | 23.8 |
| 7 | s=0.6 | 7.953953 | 9.1 | 0 | 68 | 8.7 |
| 8 | s=0.7 | 9.608953 | 7.9 | 0 | 50 | 7.5 |
| 9 | s=0.8 | 10.898953 | 6.5 | 0 | 52 | 5.8 |
Confidence level <=> konfidensgrad
# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
# Calculate standard error
n <- length(observed_data)
standard_deviation <- sd(observed_data)
standard_error <- standard_deviation / sqrt(n - 1)
# Calculate confidence interval based on standard error
alpha <- (1 - confidence_level) / 2
margin_of_error <- qnorm(1 - alpha) * standard_error
mean_estimate <- mean(observed_data)
# Calculate the percentiles for the confidence interval
confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
# Return confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}
# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#
# # Function to calculate the mean for each bootstrap sample
# compute_bootstrap_mean_fun <- function(observed_data_urn) {
# bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
# bootstrap_estimate <- mean(bootstrap_dataset)
# return(bootstrap_estimate)
# }
#
# # Perform bootstrap resampling
# bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#
# # Calculate the percentiles for the confidence interval
# alpha <- (1 - confidence_level) / 2
# confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
# confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#
# # Return the confidence interval
# return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }
## ROH-hotspot selection testing results://n
| Model | Avg_ROH_hotspot_threshold |
|---|---|
| Neutral | 28.7 |
| s=0.05 | 33.2 |
| s=0.1 | 35.0 |
| s=0.5 | 35.1 |
| s=0.4 | 35.2 |
| s=0.2 | 35.8 |
| s=0.3 | 36.8 |
| s=0.7 | 37.6 |
| s=0.6 | 38.5 |
| s=0.8 | 39.4 |
| Empirical | 75.0 |
## Overall Average Avg_F_ROH for german_shepherd : 0.2753012 //n
## Point estimate F_ROH across all 20 simulations: 0.2382806 //n
## [1] "Bootstrap 95% Confidence Interval: [0.225944707258446, 0.250616452741554]"
## Point estimate F_ROH across all 20 simulations for selection_model_s005_chr3 : 0.2730208 //n[1] "Bootstrap 95% Confidence Interval: [0.257821701861417, 0.288219838138583]"
## Point estimate F_ROH across all 20 simulations for selection_model_s01_chr3 : 0.2817853 //n[1] "Bootstrap 95% Confidence Interval: [0.262926084924311, 0.300644575075689]"
## Point estimate F_ROH across all 20 simulations for selection_model_s02_chr3 : 0.2916789 //n[1] "Bootstrap 95% Confidence Interval: [0.27582288465812, 0.30753487534188]"
## Point estimate F_ROH across all 20 simulations for selection_model_s03_chr3 : 0.2949622 //n[1] "Bootstrap 95% Confidence Interval: [0.275270344867574, 0.314654015132426]"
## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr3 : 0.2765845 //n[1] "Bootstrap 95% Confidence Interval: [0.257329413481911, 0.295839626518089]"
## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr3 : 0.2757632 //n[1] "Bootstrap 95% Confidence Interval: [0.263069394392445, 0.288457025607555]"
## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr3 : 0.2953692 //n[1] "Bootstrap 95% Confidence Interval: [0.280072875313516, 0.310665584686484]"
## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr3 : 0.2830673 //n[1] "Bootstrap 95% Confidence Interval: [0.261165531872551, 0.304969108127449]"
## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr3 : 0.2847513 //n[1] "Bootstrap 95% Confidence Interval: [0.250907729600213, 0.318594910399787]"
| Model | F_ROH | Lower_CI | Upper_CI |
|---|---|---|---|
| Neutral | 0.238 | 0.226 | 0.251 |
| s=0.05 | 0.273 | 0.258 | 0.288 |
| Empirical | 0.275 | NA | NA |
| s=0.5 | 0.276 | 0.263 | 0.288 |
| s=0.4 | 0.277 | 0.257 | 0.296 |
| s=0.1 | 0.282 | 0.263 | 0.301 |
| s=0.7 | 0.283 | 0.261 | 0.305 |
| s=0.8 | 0.285 | 0.251 | 0.319 |
| s=0.2 | 0.292 | 0.276 | 0.308 |
| s=0.3 | 0.295 | 0.275 | 0.315 |
| s=0.6 | 0.295 | 0.280 | 0.311 |
## Models where empirical F_ROH is within CI:
## [1] "s=0.05" "s=0.1" "s=0.3" "s=0.4" "s=0.5" "s=0.7" "s=0.8"
##
## Models where empirical F_ROH is outside CI:
## [1] "s=0.2" "s=0.6" "Neutral"
## Uncommented because change of analysis
## Point estimate H_e across all 20 simulations for s005_chr3 : 0.3136506 //n[1] "Bootstrap 95% Confidence Interval: [0.274729948288615, 0.35257127281979]"
## Point estimate H_e across all 20 simulations for s01_chr3 : 0.2437236 //n[1] "Bootstrap 95% Confidence Interval: [0.208784537367758, 0.278662647234207]"
## Point estimate H_e across all 20 simulations for s02_chr3 : 0.287157 //n[1] "Bootstrap 95% Confidence Interval: [0.250885743940252, 0.323428304819524]"
## Point estimate H_e across all 20 simulations for s03_chr3 : 0.2640354 //n[1] "Bootstrap 95% Confidence Interval: [0.213138129976104, 0.314932680147393]"
## Point estimate H_e across all 20 simulations for s04_chr3 : 0.3228381 //n[1] "Bootstrap 95% Confidence Interval: [0.289478390746543, 0.356197709502665]"
## Point estimate H_e across all 20 simulations for s05_chr3 : 0.3361865 //n[1] "Bootstrap 95% Confidence Interval: [0.292957745879232, 0.379415174481695]"
## Point estimate H_e across all 20 simulations for s06_chr3 : 0.3358466 //n[1] "Bootstrap 95% Confidence Interval: [0.288658400834631, 0.383034885590995]"
## Point estimate H_e across all 20 simulations for s07_chr3 : 0.3380687 //n[1] "Bootstrap 95% Confidence Interval: [0.28983399187073, 0.386303426756603]"
## Point estimate H_e across all 20 simulations for s08_chr3 : 0.3360891 //n[1] "Bootstrap 95% Confidence Interval: [0.292090306442711, 0.380087917137017]"
| Model | H_e_5th_percentile | Lower_CI | Upper_CI |
|---|---|---|---|
| s01_chr3 | 0.15285 | 0.14262 | 0.16309 |
| s08_chr3 | 0.15927 | 0.14455 | 0.17400 |
| s02_chr3 | 0.15996 | 0.15177 | 0.16815 |
| s07_chr3 | 0.16193 | 0.15085 | 0.17300 |
| s03_chr3 | 0.16287 | 0.15123 | 0.17452 |
| s04_chr3 | 0.16637 | 0.15725 | 0.17549 |
| Neutral | 0.16683 | 0.16108 | 0.17258 |
| s005_chr3 | 0.16904 | 0.16161 | 0.17647 |
| s05_chr3 | 0.17357 | 0.16482 | 0.18232 |
| s06_chr3 | 0.17614 | 0.16712 | 0.18516 |
| Empirical | NA | NA | NA |
##
## ROH-hotspot threshold comparison between the different datasets
| Model | Avg_ROH_hotspot_threshold |
|---|---|
| Neutral | 28.7 |
| s=0.05 | 33.2 |
| s=0.1 | 35.0 |
| s=0.5 | 35.1 |
| s=0.4 | 35.2 |
| s=0.2 | 35.8 |
| s=0.3 | 36.8 |
| s=0.7 | 37.6 |
| s=0.6 | 38.5 |
| s=0.8 | 39.4 |
| Empirical | 75.0 |
| Model | F_ROH | Lower_CI | Upper_CI |
|---|---|---|---|
| Neutral | 0.238 | 0.226 | 0.251 |
| s=0.05 | 0.273 | 0.258 | 0.288 |
| Empirical | 0.275 | NA | NA |
| s=0.5 | 0.276 | 0.263 | 0.288 |
| s=0.4 | 0.277 | 0.257 | 0.296 |
| s=0.1 | 0.282 | 0.263 | 0.301 |
| s=0.7 | 0.283 | 0.261 | 0.305 |
| s=0.8 | 0.285 | 0.251 | 0.319 |
| s=0.2 | 0.292 | 0.276 | 0.308 |
| s=0.3 | 0.295 | 0.275 | 0.315 |
| s=0.6 | 0.295 | 0.280 | 0.311 |
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.1610771"
| Name | Under_selection | Window_based_Average_H_e |
|---|---|---|
| Hotspot_chr3_window_1 | Yes | 0.10706 |
| Hotspot_chr3_window_3 | Yes | 0.12371 |
| Hotspot_chr3_window_2 | Yes | 0.14453 |
| Hotspot_chr17_window_2 | Yes | 0.14866 |
| Hotspot_chr17_window_1 | Yes | 0.15285 |
| Hotspot_chr19_window_1 | No | 0.16270 |
## [1] "ROH-hotspots under selection:"
| Name | Under_selection | Window_based_Average_H_e |
|---|---|---|
| Hotspot_chr3_window_1 | Yes | 0.10706 |
| Hotspot_chr3_window_3 | Yes | 0.12371 |
| Hotspot_chr3_window_2 | Yes | 0.14453 |
| Hotspot_chr17_window_2 | Yes | 0.14866 |
| Hotspot_chr17_window_1 | Yes | 0.15285 |
| Model | H_e | Lower_CI | Upper_CI | Under_Selection |
|---|---|---|---|---|
| Neutral | 0.16683 | 0.16108 | 0.17258 | No |
| s=0.1 | 0.24372 | 0.20878 | 0.27866 | No |
| s=0.3 | 0.26404 | 0.21314 | 0.31493 | No |
| s=0.2 | 0.28716 | 0.25089 | 0.32343 | No |
| s=0.05 | 0.31365 | 0.27473 | 0.35257 | No |
| s=0.4 | 0.32284 | 0.28948 | 0.35620 | No |
| s=0.6 | 0.33585 | 0.28866 | 0.38303 | No |
| s=0.8 | 0.33609 | 0.29209 | 0.38009 | No |
| s=0.5 | 0.33619 | 0.29296 | 0.37942 | No |
| s=0.7 | 0.33807 | 0.28983 | 0.38630 | No |
*** Behöver bootstrap av 5th percentiles från neutral model
| Model | Lengths_Mb | Avg_ROH_Freq |
|---|---|---|
| Hotspot_chr3_window_1 | 10.900000 | 81.3 |
| s=0.8 | 10.898953 | 6.5 |
| s=0.7 | 9.608953 | 7.9 |
| s=0.6 | 7.953953 | 9.1 |
| s=0.2 | 7.050000 | 24.0 |
| s=0.4 | 6.965000 | 8.3 |
| s=0.5 | 5.793953 | 4.4 |
| s=0.05 | 5.763953 | 29.8 |
| s=0.1 | 5.093953 | 38.6 |
| s=0.3 | 4.592906 | 10.7 |
| Hotspot_chr3_window_2 | 3.200000 | 76.3 |
| Hotspot_chr3_window_3 | 2.700000 | 77.6 |
| Hotspot_chr17_window_1 | 2.000000 | 76.4 |
| Hotspot_chr17_window_2 | 0.600000 | 76.1 |
## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## character(0)
## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## character(0)